---
title: "results plots"
author: "Yanran"
date: "2022/06/10"
output: html_document
---

### boxplot of 3 coefficients $\beta_0$, $\beta_1$, $\beta_2$

```{r message=FALSE, warning=FALSE}
library(dplyr)
library(reshape2)
library(ggplot2)
library(cowplot)
library(rstudioapi)
library(scales)
library(xtable)

setwd("~/Desktop/Research/Rachel/CT_race_diff_privacy/results_manu")

load("simnum1.RData")
dp_sim <- cars
load("simnum2.RData")
ce_sim <- cars
load("simnum3.RData")
dp0527_sim <- cars
## set working directory to wherever this file is located ##
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path ))

load("bw_adat.RData")
names(adat)
```


```{r message=FALSE, warning=FALSE}
## coef bias mape##
ce_sim <- ce_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
dp_sim <- dp_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
dp0527_sim <- dp0527_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 

box_compare1 <- ce_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
#box_compare1 <- ce_sim[,c("beta0", "beta1", "beta2")] 
box_compare1 <- box_compare1 %>% mutate(Data="DC") 
#box_compare2 <- dp_sim[,c("beta0", "beta1", "beta2")] 
box_compare2 <- dp_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
box_compare2 <- box_compare2 %>% mutate(Data="DP19")
box_compare3 <- dp0527_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
#box_compare3 <- dp0527_sim[,c("beta0", "beta1", "beta2")] 
box_compare3 <- box_compare3 %>% mutate(Data="DP20")
box_compare <- melt(rbind(box_compare1, box_compare2, box_compare3))

ggplot(box_compare, aes(x=variable,y=value, fill=Data)) + 
    geom_boxplot()
par(mfrow=c(1,3)) 
box0 <- box_compare[box_compare$variable=="beta0_bias",]
b0<- ggplot(box0, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab(NULL)+ggtitle("") +
    geom_boxplot()
box1 <- box_compare[box_compare$variable=="beta1_bias",]
b1<- ggplot(box1, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab(NULL)+ggtitle("") +
    geom_boxplot()
box2 <- box_compare[box_compare$variable=="beta2_bias",]
b2<- ggplot(box2, aes(x=variable,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ggtitle("")+
    geom_boxplot()

require(cowplot)
library(grid)
library(gridExtra)

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  b0+ theme(legend.box.margin = margin(0, 0, 0, 12)))

# add the legend to the row we made earlier. Give it one-third of 
# the width of one plot (via rel_widths).


prow <- plot_grid(
  b0+ theme(legend.position="none"),
  b1+ theme(legend.position="none"),
  b2+ theme(legend.position="none"),
  align = 'vh',
  labels = c("Beta0", "Beta1", "Beta2"),
  hjust = -1,
  nrow = 1
)
#prow

coef_boxplot<-plot_grid(prow,legend, rel_widths = c(3, .4))
coef_boxplot
```

### expected counts scatter plots

```{r scatter plot dp/dp0527 P vs ce Ptrue}
Ptrue <- matrix(adat$exp_counts_ce)
P_dp <- matrix(adat$exp_counts_dp)
colnames(P_dp) <- "DP19"
P_dp0527 <- matrix(adat$exp_counts_dp0527)
colnames(P_dp0527) <- c("DP20")

# scatter plot of P matrix (ce vs. dp)
P_dp_w_GEOID <- cbind(adat[1:2], P_dp, Ptrue) 
P_dp0527_w_GEOID <- cbind(adat[1:2], P_dp0527, Ptrue) 
P2_w_GEOID <- cbind(adat[1:2], P_dp, P_dp0527, Ptrue) 

P_dp_ce <- melt(P2_w_GEOID, id=c("GEOID", "race", "Ptrue"))
names(P_dp_ce)[3:4] <- c("DC", "Data")
P_dp_ce_scatter <- ggplot(data = P_dp_ce, mapping = aes(x = DC, y = value, color=Data)) + geom_point(alpha=0.3) + ylab("Expected DP counts") + xlab("Expected DC counts")
scatter_exp <- P_dp_ce_scatter + facet_wrap(~race,scales="free") + geom_abline(colour="red")

#pdf('scatter_exp.pdf',width=7,height=6)
# P_dp_ce_scatter2 <- ggplot(data = P_dp_ce, mapping = aes(x = Ptrue, y = value, color=race)) + geom_point(alpha=0.3) + ylab("P_dp")+scale_color_brewer(palette="Dark2")
# P_dp_ce_scatter2 + facet_wrap(~variable) + geom_abline(colour="grey")




```

Average difference of expected counts

```{r}
mean(P_dp-Ptrue)
mean(P_dp0527-Ptrue)

# mean and standard deviation of the differences in the CT-level DP and DC expected counts for the NHW population are XX for DP19

summary_text <- adat %>% mutate(diff19 = exp_counts_dp-exp_counts_ce) %>% mutate(diff20 = exp_counts_dp0527-exp_counts_ce)

diff19_w <- summary_text[summary_text$race=="white",]$diff19
diff19_b <- summary_text[summary_text$race=="black",]$diff19

diff20_w <- summary_text[summary_text$race=="white",]$diff20
diff20_b <- summary_text[summary_text$race=="black",]$diff20

huizong = function(d){
  hz <- c(mean(d),sd(d))
  return((hz))
 }

huizong(diff19_w)
huizong(diff19_b)
huizong(diff20_w)
huizong(diff20_b)
```


```{r percent error}
# percent error = (expectedDP - expectedDC)/expectedDC * 100%
summary_text <- adat %>% mutate(DP19 = (exp_counts_dp-exp_counts_ce)/exp_counts_ce) %>% mutate(DP20 = (exp_counts_dp0527-exp_counts_ce)/exp_counts_ce)

box_pe <- melt(summary_text[,c("race", "DP19", "DP20")])

box_pe$race<-mapvalues(box_pe$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))

colnames(box_pe) <- c("race", "Data", "value")

fig1 <- ggplot(box_pe, aes(x=race,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ggtitle("")+ geom_boxplot(outlier.shape = NA)+ coord_cartesian(ylim=c(-1.2, 1.8))+ ylab(NULL) 

fig1_percent_scale <- ggplot(box_pe, aes(x=race,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ggtitle("")+
#    geom_boxplot()+ 
   geom_boxplot(outlier.shape = NA)+ 
  #coord_cartesian(ylim=c(-1.2, 1.8))+ ylab(NULL) +
scale_y_continuous(labels = percent_format(), limits = c(-1.2, 1.8))

pe19_w <- summary_text[summary_text$race=="white",]$DP19
pe19_b <- summary_text[summary_text$race=="black",]$DP19

pe20_w <- summary_text[summary_text$race=="white",]$DP20
pe20_b <- summary_text[summary_text$race=="black",]$DP20


summary_pe_under <- rbind(mean(pe19_w<0), mean(pe19_b<0))%>% cbind( rbind(mean(pe20_w<0), mean(pe20_b<0)))
summary_pe_under <- summary_pe_under*100
colnames(summary_pe_under)<-c("DP19", "DP20")
rownames(summary_pe_under) <- c("NHW", "Black")
xtable(summary_pe_under,digits = 4)

```




### boxplot of comparasion for the SMR estimates using the census denominators with the true SMRs

```{r sim_from_cluster2}
setwd("~/Desktop/Research/Rachel/CT_race_diff_privacy/results_manu")
load("simnum4.RData")
dp0527_fit <- cars
dp0527_fit_mean <- apply(dp0527_fit, 2, mean)
load("simnum5.RData")
dp_fit <- cars
dp_fit_mean <- apply(dp_fit, 2, mean)
load("simnum6.RData")
ce_fit <- cars
ce_fit_mean <- apply(ce_fit, 2, mean)

load("exp_theta4.RData")
true_smrs_dp20 = exp_theta
load("exp_theta5.RData")
true_smrs_dp19 = exp_theta
load("exp_theta6.RData")
true_smrs_ce = exp_theta

# compute smrdps
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path))
load("bw_adat.RData")

# old dp
exp_counts_dp19 = data.frame(t(matrix(rep(adat$exp_counts_dp,100),c(length(adat$exp_counts_dp),100))))
smrdp19 = dp_fit/exp_counts_dp19
mape_ij_dp19 = colSums(abs((smrdp19-true_smrs_dp19)/true_smrs_dp19))/100
# 2020-05-27 dp
exp_counts_dp20 = data.frame(t(matrix(rep(adat$exp_counts_dp0527,100),c(length(adat$exp_counts_dp0527),100))))
smrdp20 = dp0527_fit/exp_counts_dp20
mape_ij_dp20 = colSums(abs((smrdp20-true_smrs_dp20)/true_smrs_dp20))/100
# ce
exp_counts_ce = data.frame(t(matrix(rep(adat$exp_counts_ce,100),c(length(adat$exp_counts_ce),100))))
smrce = ce_fit/exp_counts_ce
mape_ij_ce = colSums(abs((smrce-true_smrs_ce)/true_smrs_ce))/100


```



```{r MAPE boxplot2}
# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
smr_mape <- cbind(adat[1:2], mape_ij_ce, mape_ij_dp19, mape_ij_dp20) 
smr_mape_box <- smr_mape[,c("GEOID", "race", "mape_ij_ce","mape_ij_dp19","mape_ij_dp20")]
names(smr_mape_box) <- c("GEOID", "race", "DC","DP19","DP20")

melt_mape <- melt(smr_mape_box)
names(melt_mape)<-c("GEOID","race","Data","value")
smr_mape_bw_woo <- ggplot(melt_mape, aes(x=race,y=value, fill=Data)) + 
    geom_boxplot(outlier.shape = NA)+ylab("MAPE") + coord_cartesian(ylim=c(0, 1.2))+ ylab(NULL)+ggtitle("")

# old dp
bias_ij_dp19 = colSums(smrdp19-true_smrs_dp19)/100
# 2020-05-27 dp
bias_ij_dp20 = colSums(smrdp20-true_smrs_dp20)/100
# ce
bias_ij_ce = colSums(smrce-true_smrs_ce)/100

# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
smr_bias <- cbind(adat[1:2], bias_ij_ce, bias_ij_dp19, bias_ij_dp20) 
smr_bias_box <- smr_bias[,c("GEOID", "race", "bias_ij_ce","bias_ij_dp19","bias_ij_dp20")]

smr_bias_bw_woo <- ggplot(melt(smr_bias_box), aes(x=race,y=value, fill=variable)) + 
    geom_boxplot(outlier.shape = NA)+ylab("Bias")+ coord_cartesian(ylim=c(-0.5, 0.8))+ ylab(NULL) 

require(cowplot)
library(grid)
library(gridExtra)

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  smr_mape_bw_woo+ theme(legend.box.margin = margin(0, 0, 0, 12)))

prow <- plot_grid(
  smr_bias_bw_woo+ theme(legend.position="none"),
  smr_mape_bw_woo+ theme(legend.position="none"),
  align = 'vh',
  labels = c("Bias", "MAPE"),
  hjust = -1,
  nrow = 1
)

smr_mape_bias <-cowplot::plot_grid(prow,legend, rel_widths = c(3, .4))
smr_mape_bias

```

### maps of MAPE for 2020-05-27 version

```{r}
#### MAPE0527 plot
load('bw_adat.RData')
adat <- cbind(adat, mape_ij_dp20)[,c(1, 2, 8)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

## extract CT shapefile ##
ma_shp<-tracts(state = 'MA',year=2010)

## merge with pop and rate data ##
ma_shp<-merge(ma_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ma_shp<-st_as_sf(ma_shp)  #1478*21


shp_plot0527<-gather(ma_shp,'race','fit0527_mape',
                 c(mape_ij_dp20_w,mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('mape_ij_dp20_w','mape_ij_dp20_b'),
                         to=c('NHW','Black'))
#quantile(shp_plot0527$fit0527_mape, na.rm = T)
# 0.1589031 0.3059507 0.5038545 0.6941363 1.7476195 
ma_q<-c(0,0.21,0.466, 0.722, 0.978,1.234,1.49,max(shp_plot0527$fit0527_mape,na.rm = T)+1)


shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_mape,breaks=ma_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, MA ##
ma_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "YlOrRd")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.title=element_blank(),text=element_text(size=18))+
  facet_wrap(~ race)

## read in boston shapefiles ##
bos_shp<-st_read('boston_ct/Census_2010_Tracts.shp')
bos_shp<-merge(bos_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

bos_bbox<-as.numeric(st_bbox(bos_shp))

xlim<-c(bos_bbox[1], bos_bbox[1]+.4*(bos_bbox[3]-bos_bbox[1]))
ylim<-c(bos_bbox[2], bos_bbox[2]+.8*(bos_bbox[4]-bos_bbox[2]))

## remove CTs with zero population (these are the islands off the MA coast) ##
#bos_shp<-bos_shp[which(bos_shp$exp_counts_ce_w!="NA" | bos_shp$exp_counts_ce_b!="NA"),]

## long form by race ##
shp_plot0527<-gather(bos_shp,'race','fit0527_mape',
                 c(mape_ij_dp20_w,mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('mape_ij_dp20_w','mape_ij_dp20_b'),
                         to=c('NHW','Black'))

shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_mape,breaks=ma_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, BOS ##  
bos_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  scale_fill_brewer(palette = "YlOrRd")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.title=element_blank(),text=element_text(size=18))+
  facet_wrap(~ race)

m0527<-cowplot::plot_grid(ma_maps0527,bos_maps0527,ncol=1)

## save to pdf ##
pdf('maps_mape0527.pdf',width=7,height=6)
print(m0527)


```






